{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Utility graph plot matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "def draw_graph(G, node_names={}, nodes_label=[], node_size=900):\n",
    "    pos_nodes = nx.spring_layout(G)\n",
    "    \n",
    "    col = {0:\"steelblue\",1:\"red\",2:\"green\"}\n",
    "    \n",
    "    colors = [col[x] for x in nodes_label]\n",
    "    \n",
    "    nx.draw(G, pos_nodes, with_labels=True, node_color=colors, node_size=node_size, edge_color='gray', \n",
    "            arrowsize=30)\n",
    "    \n",
    "    \n",
    "    \n",
    "    pos_attrs = {}\n",
    "    for node, coords in pos_nodes.items():\n",
    "        pos_attrs[node] = (coords[0], coords[1] + 0.08)\n",
    "        \n",
    "    \n",
    "    plt.axis('off')\n",
    "    axis = plt.gca()\n",
    "    axis.set_xlim([1.2*x for x in axis.get_xlim()])\n",
    "    axis.set_ylim([1.2*y for y in axis.get_ylim()])\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Label propagation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import networkx as nx\n",
    "\n",
    "G = nx.barbell_graph(m1=3, m2=2)\n",
    "nodes_label = [0 for x in range(len(G.nodes()))]\n",
    "nodes_label[0] = 1\n",
    "nodes_label[6] = 2\n",
    "draw_graph(G, nodes_label=nodes_label, node_size=1200)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Degree matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from numpy.linalg import inv\n",
    "\n",
    "D = [G.degree(n) for n in G.nodes()]\n",
    "D = np.diag(D)\n",
    "D"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Proximity matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "A = inv(D)*nx.to_numpy_matrix(G)\n",
    "A"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Label propagation implemenation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import networkx as nx\n",
    "from numpy.linalg import inv\n",
    "from abc import ABCMeta, abstractmethod\n",
    "from sklearn.base import BaseEstimator, ClassifierMixin\n",
    "from sklearn.utils.multiclass import check_classification_targets\n",
    "from sklearn.utils.validation import check_is_fitted, _deprecate_positional_args\n",
    "\n",
    "class GraphLabelPropagation(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):\n",
    "    \"\"\"Graph label propagation module.\n",
    "    Parameters\n",
    "    ----------\n",
    "    max_iter : int, default=30\n",
    "        Change maximum number of iterations allowed.\n",
    "    tol : float, default=1e-3\n",
    "        Convergence tolerance: threshold to consider the system at steady\n",
    "        state.\n",
    "    \"\"\"\n",
    "\n",
    "    @_deprecate_positional_args\n",
    "    def __init__(self, max_iter=30, tol=1e-3):\n",
    "\n",
    "        self.max_iter = max_iter\n",
    "        self.tol = tol\n",
    "\n",
    "    def predict(self, X):\n",
    "        \"\"\"Performs inductive inference across the model.\n",
    "        Parameters\n",
    "        ----------\n",
    "        X : A networkx array.\n",
    "            The data matrix.\n",
    "        Returns\n",
    "        -------\n",
    "        y : ndarray of shape (n_samples,)\n",
    "            Predictions for input data.\n",
    "        \"\"\"\n",
    "        probas = self.predict_proba(X)\n",
    "        return self.classes_[np.argmax(probas, axis=1)].ravel()\n",
    "\n",
    "    def predict_proba(self, X):\n",
    "        \"\"\"Predict probability for each possible outcome.\n",
    "        Compute the probability estimates for each single node in X\n",
    "        and each possible outcome seen during training (categorical\n",
    "        distribution).\n",
    "        Parameters\n",
    "        ----------\n",
    "        X : A networkx array.\n",
    "        Returns\n",
    "        -------\n",
    "        probabilities : ndarray of shape (n_samples, n_classes)\n",
    "            Normalized probability distributions across\n",
    "            class labels.\n",
    "        \"\"\"\n",
    "        check_is_fitted(self)\n",
    "        \n",
    "        return self.label_distributions_\n",
    "    \n",
    "    def _validate_data(self, X, y):\n",
    "        if not isinstance(X, nx.Graph):\n",
    "            raise ValueError(\"Input should be a networkX graph\")\n",
    "        if not len(y) == len(X.nodes()):\n",
    "            raise ValueError(\"Label data input shape should be equal to the number of nodes in the graph\")\n",
    "        return X, y\n",
    "    \n",
    "    @staticmethod\n",
    "    def build_label(x,classes):\n",
    "        tmp = np.zeros((classes))\n",
    "        tmp[x] = 1\n",
    "        return tmp\n",
    "    \n",
    "    def fit(self, X, y):\n",
    "        \"\"\"Fit a semi-supervised label propagation model based\n",
    "        on the input graph G and corresponding label matrix y with a dedicated marker value for\n",
    "        unlabeled samples.\n",
    "        Parameters\n",
    "        ----------\n",
    "        X : A networkX array.\n",
    "        y : array-like of shape (n_samples,)\n",
    "            `n_labeled_samples` (unlabeled points are marked as -1)\n",
    "            All unlabeled samples will be transductively assigned labels.\n",
    "        Returns\n",
    "        -------\n",
    "        self : object\n",
    "        \"\"\"\n",
    "        X, y = self._validate_data(X, y)\n",
    "        self.X_ = X\n",
    "        check_classification_targets(y)\n",
    "\n",
    "        D = [X.degree(n) for n in X.nodes()]\n",
    "        D = np.diag(D)\n",
    "        \n",
    "        # label construction\n",
    "        # construct a categorical distribution for classification only\n",
    "        unlabeled_index = np.where(y==-1)[0]\n",
    "        labeled_index = np.where(y!=-1)[0]\n",
    "        unique_classes = np.unique(y[labeled_index])\n",
    "        \n",
    "        self.classes_ = unique_classes\n",
    "        \n",
    "        Y0 = np.array([self.build_label(y[x], len(unique_classes)) \n",
    "                                 if x in labeled_index else np.zeros(len(unique_classes)) for x in range(len(y))])\n",
    "        \n",
    "        A = inv(D)*nx.to_numpy_matrix(G)\n",
    "        Y_prev = Y0\n",
    "        it = 0\n",
    "        c_tool = 10\n",
    "        \n",
    "        while it < self.max_iter & c_tool > self.tol:\n",
    "            Y = A*Y_prev\n",
    "            #force labeled nodes\n",
    "            Y[labeled_index] = Y0[labeled_index]\n",
    "            \n",
    "            it +=1\n",
    "            c_tol = np.sum(np.abs(Y-Y_prev))\n",
    "            \n",
    "            Y_prev = Y\n",
    "            \n",
    "        self.label_distributions_ = Y\n",
    "        return self"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Label propagation execution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "glp = GraphLabelPropagation()\n",
    "y = np.array([-1 for x in range(len(G.nodes()))])\n",
    "y[0] = 1\n",
    "y[6] = 0\n",
    "glp.fit(G,y)\n",
    "tmp = glp.predict(G)\n",
    "print(glp.predict_proba(G))\n",
    "\n",
    "draw_graph(G, nodes_label=tmp+1, node_size=1200)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Label spreading"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import networkx as nx\n",
    "\n",
    "G = nx.barbell_graph(m1=3, m2=2)\n",
    "nodes_label = [0 for x in range(len(G.nodes()))]\n",
    "nodes_label[0] = 1\n",
    "nodes_label[6] = 2\n",
    "draw_graph(G, nodes_label=nodes_label, node_size=1200)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Degree matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from numpy.linalg import inv\n",
    "\n",
    "D = [G.degree(n) for n in G.nodes()]\n",
    "D = np.diag(D)\n",
    "D"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Normalized graph Laplacian matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.linalg import fractional_matrix_power\n",
    "D_inv = fractional_matrix_power(D, -0.5)\n",
    "L = D_inv*nx.to_numpy_matrix(G)*D_inv\n",
    "L"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Label spreading implementation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import networkx as nx\n",
    "from sklearn.preprocessing import normalize\n",
    "from scipy.linalg import fractional_matrix_power\n",
    "from sklearn.utils.multiclass import check_classification_targets\n",
    "\n",
    "class GraphLabelSpreading(GraphLabelPropagation):\n",
    "    \"\"\"Graph label propagation module.\n",
    "    Parameters\n",
    "    ----------\n",
    "    max_iter : int, default=30\n",
    "        Change maximum number of iterations allowed.\n",
    "    tol : float, default=1e-3\n",
    "        Convergence tolerance: threshold to consider the system at steady\n",
    "        state.\n",
    "    \"\"\"\n",
    "\n",
    "    @_deprecate_positional_args\n",
    "    def __init__(self, max_iter=30, tol=1e-3, alpha=0.6):\n",
    "\n",
    "        self.alpha = alpha\n",
    "        super().__init__(max_iter, tol)\n",
    "    \n",
    "    def fit(self, X, y):\n",
    "        \"\"\"Fit a semi-supervised label propagation model based\n",
    "        on the input graph G and corresponding label matrix y with a dedicated marker value for\n",
    "        unlabeled samples.\n",
    "        Parameters\n",
    "        ----------\n",
    "        X : A networkX array.\n",
    "        y : array-like of shape (n_samples,)\n",
    "            `n_labeled_samples` (unlabeled points are marked as -1)\n",
    "            All unlabeled samples will be transductively assigned labels.\n",
    "        Returns\n",
    "        -------\n",
    "        self : object\n",
    "        \"\"\"\n",
    "        X, y = self._validate_data(X, y)\n",
    "        self.X_ = X\n",
    "        check_classification_targets(y)\n",
    "\n",
    "        D = [X.degree(n) for n in X.nodes()]\n",
    "        D = np.diag(D)\n",
    "        D_inv = np.matrix(fractional_matrix_power(D,-0.5))\n",
    "        L = D_inv*nx.to_numpy_matrix(G)*D_inv\n",
    "        \n",
    "        # label construction\n",
    "        # construct a categorical distribution for classification only\n",
    "        unlabeled_index = np.where(y==-1)[0]\n",
    "        labeled_index = np.where(y!=-1)[0]\n",
    "        unique_classes = np.unique(y[labeled_index])\n",
    "        \n",
    "        self.classes_ = unique_classes\n",
    "        \n",
    "        Y0 = np.array([self.build_label(y[x], len(unique_classes)) \n",
    "                                 if x in labeled_index else np.zeros(len(unique_classes)) for x in range(len(y))])\n",
    "        \n",
    "        Y_prev = Y0\n",
    "        it = 0\n",
    "        c_tool = 10\n",
    "        \n",
    "        while it < self.max_iter & c_tool > self.tol:\n",
    "            Y = self.alpha*(L*Y_prev)+((1-self.alpha)*Y0)\n",
    "\n",
    "            it +=1\n",
    "            c_tol = np.sum(np.abs(Y-Y_prev))\n",
    "            Y_prev = Y\n",
    "        self.label_distributions_ = Y\n",
    "        return self"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Label Spreading"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gls = GraphLabelSpreading(max_iter=1000)\n",
    "y = np.array([-1 for x in range(len(G.nodes()))])\n",
    "y[0] = 1\n",
    "y[6] = 0\n",
    "gls.fit(G,y)\n",
    "tmp = gls.predict(G)\n",
    "print(gls.predict_proba(G))\n",
    "draw_graph(G, nodes_label=tmp+1, node_size=1200)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
